## Estimation of a Multinomial Logit Model by Maximum Likelihood using mnlogit and Fixed point subroutine
## by Jose-Alberto Guerra
##    Created   04/2014
##    Modified  30/06/2020
##    Uses: R version 3.1.2 (install old version of R, by holding down the Control key during the launch of RStudio you can use 64-bit 3.1.2 version)
#Proof2Rnew.dta comes from border_reg_weighted4v2.do, notice we have eliminated agricultural
#Proof2RCoordnew.dta is Proof2Rnew but adding the coordinates for objectid_1 points. It comes from TemporyPointsCoordinatesv1.do
#to deal with spatial data visualitaion in R see http://spatial.ly/r/
## 
## Where we guarantee that KS probabilities are within [0,1]
## We guarantee inner loop (fixed point expectations) convergence using tol 10e-10 for the sup-norm stopping criterion
## We follow KS 2012 and KS 2018 using outer loop iterations k=50 and guaranteeing inner loop convergence (see above)
## This file creates the basic datasets we use to estimate the NPL with fixed point subroutine estimation. 
## TO get the dataset related to:
## exercise <- 
## 1. w50list: weighting matrix based on a 50mts distance, run the code as it now stands 
## 2. atleast1: weighting matrix based on a 0mts distance, run the code:
## 3  age1530: If one restricts the sample to only 15 to 30 years old (notice this is different to agecohort1530 estimation reported in the paper)
## 4. age1545: If one restricts the sample to only 15 to 30 years old (notice this is different to agecohort1545 estimation reported in the paper)
## 5. h1: If one keeps  the first half of the sample
## 6. h2: If one keeps the second half of the sample 
## 7. within50All: weighting matrix based on a neighbours living farther away than 50mts distance
###########################################################################
rm(list=ls(all=TRUE))
#########################
####    Preamble     ####
#########################
### Define exercise
exercise <-  "w50list" #"within50All"
##Functions
#codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Codes/RCodes/EEACodes/"
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
###########################################################################
#1. DATA
###########################################################################
###########################################################################

set.seed(12345)
#define WD and DATASET
##If normal data set
## root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/" #root <- "C:/HPC/"
#root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/" #root <- "C:/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
mapdata <- paste(root,"MOLA/",sep="")
dataOGR <- paste(root,"MOLA/",sep="")
wddata<-c(paste(root,"Results/ReStat/",sep=""),paste(root,"NAPP/Proof2RCoordnew.dta",sep="")) 
setwd(wddata[1])

sink("Explorenew.txt")

#Calling maps
map <- readShapeSpatial(paste(mapdata,"socialparishpolygonV1_LLPA.shp",sep=""))# however no spatial info


plot(map)
line <- readShapeSpatial(paste(mapdata,"socialparishlineV1_LLPA.shp",sep=""))
linec <- readShapeSpatial(paste(mapdata,"civilparishMOLAline.shp",sep=""))


#calling NAPP household head data
NAPP<-read.dta(wddata[2],convert.factors=FALSE)
index <- with(NAPP, order(social,HHNUMhead)) #to sort the data
NAPP <- NAPP[index,]

#Modifying NAPP
NAPP$Buffer400<-1 #including column with buffer400


#Creating a subset of the sample
NAPPsub <- subset(NAPP,Buffer400==1,select=c(classnew,nchild, stayerp, stayerc, nservant, SEX, married,AGE,
                                             civil,social,ID_socialborder,Buffer400,Buffer100,Buffer80,Buffer60,Buffer50,objectid_1,longitude,latitude,
                                             problem2,problemsocial2))
NAPPsubhot <- subset(NAPP,Buffer400==1,select=c(sh_classnew0_hh, sh_classnew1_hh,sh_classnew2_hh,sh_classnew3_hh,sh_classnew4_hh,sh_classnew5_hh,nchild, 
                                                sh_classnew6_hh, sh_classnew7_hh, mean_stayerp, mean_stayerc, mean_nservant, mean_married, mean_AGE,mean_nchild,
                                             civil,social,ID_socialborder,Buffer400,Buffer100,Buffer80,Buffer60,Buffer50,objectid_1,longitude,latitude,
                                             problem2,problemsocial2))
class <- factor(NAPPsub$classnew) #defining factors based on class 
class.m <- model.matrix(~ -1 + class) #creating columnsdummy vars for each class category
NAPPsub <- cbind(NAPPsub,class.m)
#Creating aggregates up to street points
NAPPpoints <- summaryBy(SEX+AGE+classnew+nchild+nservant+married+stayerp+stayerc+class0+class1+class2+class3+class4+class5+class6+class7+
                          longitude+latitude+problem2+problemsocial2+Buffer400+social+civil~objectid_1, data=NAPPsub, FUN=c(sum,mean), na.rm=TRUE, keep.names=T)
NAPPpointshot <- summaryBy(mean_AGE+sh_classnew0_hh+sh_classnew1_hh+sh_classnew2_hh+sh_classnew3_hh+sh_classnew4_hh+sh_classnew5_hh+nchild+ 
                           sh_classnew6_hh+sh_classnew7_hh+mean_nchild+mean_nservant+mean_married+mean_stayerp+mean_stayerc+
                          longitude+latitude+problem2+problemsocial2+Buffer400+social+civil~objectid_1, data=NAPPsubhot, FUN=c(sum,mean), na.rm=TRUE, keep.names=T)
quant1 <- function(x, ...) { round(quantile(x,prob=.1)[[1]],0)}
#Summary of number of parish members and number of same street--neighbours 
NAPPsocials <- summaryBy(Buffer400.sum ~ social.mean, data=NAPPpoints, FUN=c(quant1,max,median,sum), na.rm=TRUE, keep.names=T)
colnames(NAPPsocials) <- c("IDSocial","minnei","maxnei","mnei","N")
NAPPsocials$mnei <- round(NAPPsocials$mnei,0)
NAPPsocialsall <- NAPPsocials
#subset NAPPSocials with N>1 & N>mnei
NAPPsocials <- NAPPsocials[NAPPsocials$N>1 & NAPPsocials$N > NAPPsocials$minnei,]
save(NAPPsocials,file="NAPPdistrneiNnew.RData")

#Summary of 
NAPPsocialcivil <- summaryBy(civil.mean ~ social.mean, data=NAPPpoints, FUN=c(mean), na.rm=TRUE, keep.names=T)
colnames(NAPPsocialcivil) <- c("IDSocial","IDCivil")
NAPPsocialcivil <- ddply(NAPPsocialcivil,.(IDCivil),transform,nsocials=length(IDSocial))

NAPPsocialcivilnei <- merge(NAPPsocialcivil,NAPPsocialsall,by="IDSocial", all.x=T, sort=T) 

#Merging NAPP data on the Map
map@data <- merge(map@data,NAPPsocialcivilnei, by="IDSocial", all.x=T, sort=T) 


#################################
## Creating some maps to export#
## descriptive statistics####
#################################

#To define colours palettes http://spatialanalysis.co.uk/wp-content/uploads/2010/09/Maps_R_Final.pdf
colours <-brewer.pal(5, "Spectral") #"Blues" #see display.brewer.pal(7,"GnBu")
colours <- unlist(sapply(c(1:length(colours)),function(x) colours[length(colours)-x+1]))

#Number of social parishes per civil parish
brks<- c(1,2, 3, 5, 10, 15)# classIntervals(map@data$nsocials, n=5, style="quantile")
plot(brks, col=colours)

cairo_ps(paste("MapCivilNsocialsnew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(map, col=colours[findInterval(map@data$nsocials, brks,all.inside=TRUE)], axes=F)
plot(linec,lty=3,lwd=2,col="gray",add=TRUE)
box()
#title(paste ("London 1881 number of social parishes per civil"))
legend("bottomleft", legend=leglabs(brks), fill=colours, bty="n", bg = "gray90",cex=.75)
dev.off()

c.n.2s <- unique(map@data[map@data$nsocials<2,]$IDCivil) #IDCivil such that there is only 1 social parish
c.n.2s <- c.n.2s[!is.na(c.n.2s)]

#Number of parish members per social parish
brks1 <- c(17,30,210,630,1120,3000)
cairo_ps(paste("MapCivilNnew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(map, col=colours[findInterval(map@data$N, brks1,all.inside=TRUE)], axes=F)
plot(linec,lty=3,lwd=2,col="gray",add=TRUE)
box()
#title(paste ("London 1881 number of members per social"))
legend("bottomleft", legend=leglabs(brks1), fill=colours, bty="n", bg = "gray90",cex=.75)
dev.off()

#Number of median neighbours per social parish
brks2 <- classIntervals(map@data[map@data$N>= 30,]$mnei, n=5, style="quantile",na.rm=TRUE)
brks2 <- brks2$brks
cairo_ps(paste("MapCivilmneinew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(map, col=colours[findInterval(map@data$mnei, brks2,all.inside=TRUE)], axes=F)
plot(linec,lty=3,lwd=2,col="gray",add=TRUE)
box()
#title(paste ("London 1881 number of median street neighbours per social"))
legend("bottomleft", legend=leglabs(brks2), fill=colours, bty="n", bg = "gray90",cex=.75)
dev.off()

p.n.N <- unique(map@data[map@data$N<30 ,]$IDSocial) #IDSocial such that there is less than 30 obs
p.n.N <- p.n.N[!is.na(p.n.N)]

#Which parishes are the ones we will based our analysis
cairo_ps(paste("MapSocialLeftnew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(map)
plot(map[map@data$IDCivil %in% c.n.2s | (is.na(map@data$IDCivil) & map@data$IDSocial!=207) | map@data$IDSocial %in% c(p.n.N,23),], add=TRUE, col="gray", lty=3)
#title(paste ("London 1881 social parishes under analysis"))
legend("bottomleft", c("included","not included"), col = c("black","gray"), text.col = "black", lty = c(1,3), pch = c(NA,NA),merge = TRUE, cex=.75)
dev.off()

cairo_ps(paste("MapLinesCivSocnew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(line,lwd=2,col="gray")
plot(linec,lty=3,lwd=2,col="1",add=TRUE)
legend("bottomleft", c("social border","civil border"), col = c("gray", "black"), text.col = "black", lty = c(1,3), pch = c(NA,NA),merge = TRUE, bg = "gray90",cex=.75)
dev.off()

cairo_ps(paste("MapPointsSubsamplenew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(line,lwd=2,col="gray")
plot(linec,lty=3,lwd=2,col="1",add=TRUE)
points(NAPPpoints$longitude.mean,NAPPpoints$latitude.mean, pch=20,col="deepskyblue",cex=.7)
points(NAPPpoints[NAPPpoints$problem2.mean==1,]$longitude.mean,NAPPpoints[NAPPpoints$problem2.mean==1,]$latitude.mean,col="blue",pch=20, cex=.65)
points(NAPPpoints[NAPPpoints$problem2.mean==1 & NAPPpoints$problemsocial2.mean>1,]$longitude.mean,NAPPpoints[NAPPpoints$problem2.mean==1 & NAPPpoints$problemsocial2.mean>1,]$latitude.mean,col="red", pch=20, cex=.5)
legend("bottomleft", c("social border","civil border","point original","in border diff civil", "in border wo o social"), col = c("gray", "black", "deepskyblue", "blue", "red"), text.col = "black", lty = c(1,3,rep(NA,3)), pch = c(NA,NA, rep(20,3)),merge = TRUE, bg = "gray90",cex=.75)
dev.off()


#Subsetting points, those to be included in the econometric analysis below
NAPPpointsest <- subset(NAPPpoints,!(civil.mean %in% c.n.2s) & !(social.mean %in% p.n.N) ,select=objectid_1:civil.mean)
NAPPpointsesthot <- subset(NAPPpointshot,!(civil.mean %in% c.n.2s) & !(social.mean %in% p.n.N) ,select=objectid_1:civil.mean)
#plot 
points(NAPPpointsest$longitude.mean,NAPPpointsest$latitude.mean,col="blue", pch=20, cex=.5)

#example: defining colours
#plot an example with class4
nclr <- 5
colours2 <-brewer.pal(nclr,"Spectral" ) # "Blues" #see display.brewer.pal(7,"GnBu")
colours2 <- unlist(sapply(c(1:length(colours2)),function(x) colours2[length(colours2)-x+1]))
brks2 <- classIntervals(NAPPpointsest$class4.mean, nclr, style="quantile",na.rm=TRUE) #style="sd",na.rm=TRUE)
brks2 <- brks2$brks
cairo_ps(paste("MapClass4new.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(line,lwd=2,col="gray")
plot(linec,lty=3,lwd=2,col="1",add=TRUE)
points(NAPPpointsest$longitude.mean,NAPPpointsest$latitude.mean,col=colours2[findInterval(NAPPpointsest$class4.mean, brks2,all.inside=TRUE)],pch=20,cex=.7)
box()
title(paste ("London 1881: Industrial Artisans per streets"))
legend("bottomleft", legend=leglabs(round(brks2,3)), fill=colours2, bty="n", bg = "gray90",cex=.75)
dev.off()
#plot all classes
nclr <- 5
colours2 <-brewer.pal(nclr,"Spectral" ) # "Blues" #see display.brewer.pal(7,"GnBu")
colours2 <- unlist(sapply(c(1:length(colours2)),function(x) colours2[length(colours2)-x+1]))
for(occ in c(paste("class",c(min(NAPP$classnew):max(NAPP$classnew)), ".mean",sep=""))) {
  brks2 <- classIntervals(NAPPpointsest[[occ]], n=nclr, style="quantile",na.rm=TRUE) #style="sd",na.rm=TRUE) style="fisher",na.rm=TRUE)
  brks2 <- brks2$brks
  cairo_ps(paste("Map",occ,"new.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
  plot(line,lwd=2,col="gray")
  plot(linec,lty=3,lwd=2,col="1",add=TRUE)
  points(NAPPpointsest$longitude.mean,NAPPpointsest$latitude.mean,col=colours2[findInterval(NAPPpointsest[[occ]], brks2,all.inside=TRUE)],pch=20,cex=.7)
  box()
  #title(paste("London 1881:",occ, "per streets",sep=""))
  legend("bottomleft", legend=leglabs(round(brks2,3)), fill=colours2, bty="n", bg = "gray90",cex=.75)
  dev.off()
}

#plot an example with class4 but with sample weighted by social parish
nclr <- 5
colours2 <-brewer.pal(nclr,"Spectral" ) # "Blues" #see display.brewer.pal(7,"GnBu")
colours2 <- unlist(sapply(c(1:length(colours2)),function(x) colours2[length(colours2)-x+1]))
brks2 <- classIntervals(NAPPpointsesthot$sh_classnew4_hh.mean, nclr, style="quantile",na.rm=TRUE) #style="fisher" style="sd",na.rm=TRUE)
brks2 <- brks2$brks
cairo_ps(paste("MapshClass4new.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
plot(line,lwd=2,col="gray")
plot(linec,lty=3,lwd=2,col="1",add=TRUE)
points(NAPPpointsesthot$longitude.mean,NAPPpointsesthot$latitude.mean,col=colours2[findInterval(NAPPpointsesthot$sh_classnew4_hh.mean, brks2,all.inside=TRUE)],pch=20,cex=.7)
box()
title(paste ("London 1881: Industrial Artisans per streets"))
legend("bottomleft", legend=leglabs(round(brks2,3)), fill=colours2, bty="n", bg = "gray90",cex=.75)
dev.off()
#plot all classes but with sample weighted by social parish
nclr <- 5
colours2 <-brewer.pal(nclr,"Spectral" ) # "Blues" #see display.brewer.pal(7,"GnBu")
colours2 <- unlist(sapply(c(1:length(colours2)),function(x) colours2[length(colours2)-x+1]))
for(occ in c(paste("sh_classnew",c(min(NAPP$classnew):max(NAPP$classnew)), "_hh.mean",sep=""))) {
  brks2 <- classIntervals(NAPPpointsesthot[[occ]], n=nclr, style="quantile",na.rm=TRUE) #style="quantile", style="fisher"
  brks2 <- brks2$brks
  cairo_ps(paste("Map",occ,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
  plot(line,lwd=2,col="gray")
  plot(linec,lty=3,lwd=2,col="1",add=TRUE)
  points(NAPPpointsesthot$longitude.mean,NAPPpointsesthot$latitude.mean,col=colours2[findInterval(NAPPpointsesthot[[occ]], brks2,all.inside=TRUE)],pch=20,cex=.7)
  box()
  #title(paste("London 1881:",occ,sep=""))
  legend("bottomleft", legend=leglabs(round(brks2,3)), fill=colours2, bty="n", bg = "gray90",cex=.75)
  dev.off()
}
#aggregating them to the social parish level, for the individual street observations, to see differences across parishes
NAPPsocialsest <- summaryBy( SEX.mean+ AGE.mean+nchild.mean+nservant.mean+married.mean+stayerp.mean+stayerc.mean+class0.mean+
                               class1.mean+class2.mean+class3.mean+class4.mean+class5.mean+class6.mean+class7.mean~ social.mean, data=NAPPpointsest, FUN=c(mean), na.rm=TRUE, keep.names=F)
colnames(NAPPsocialsest)<-c("IDSocial", "SEX.point.mean","AGE.point.mean","nchild.point.mean","nservant.point.mean","married.point.mean",
                            "stayerp.point.mean"  ,"stayerc.point.mean"  ,"class0.point.mean"   ,"class1.point.mean"   ,"class2.point.mean",
                            "class3.point.mean","class4.point.mean","class5.point.mean","class6.point.mean","class7.point.mean"  )
mapest <- map[!(map@data$IDCivil %in% c.n.2s | (is.na(map@data$IDCivil) & map@data$IDSocial!=207) | map@data$IDSocial %in% c(p.n.N,23)),]
plot(mapest)
mapest@data <- merge(mapest@data,NAPPsocialsest, by="IDSocial", all.x=T, sort=T) #or sort=F??

colours <-brewer.pal(5, "Spectral") #"Blues" #see display.brewer.pal(7,"GnBu")
colours <- unlist(sapply(c(1:length(colours)),function(x) colours[length(colours)-x+1]))

for(occ in c(colnames(NAPPsocialsest)[2:ncol(NAPPsocialsest)])[-1]) { #To remove SEX, no variation
  brks2 <- classIntervals(mapest@data[[occ]], n=5, style="quantile",na.rm=TRUE) #fisher quantile
  brks2 <- brks2$brks
  cairo_ps(paste("MapSocialLeftnew",strsplit(occ,".point.")[[1]][1],".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
  plot(mapest, col=colours[findInterval(mapest@data[[occ]], brks2,all.inside=TRUE)], axes=F)
  plot(linec,lty=3,lwd=3,col="gray",add=TRUE)
  box()
  #title(paste("London 1881 ",strsplit(occ,".point.")[[1]][1],sep=""))
  legend("bottomleft", legend=leglabs(round(brks2,3)), fill=colours, bty="n", bg = "gray90",cex=.75)
  dev.off()
}
####################
##Data Definition##
####################

##Individual Variables

#subset of pointsobs such that: civil parish != Noscials<2 and Social parishes such that n_p>=30 and not unemployed.
NAPPest<-subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) ,select=c(civil, social, HHNUMhead, SEX, AGE, married, nchild, nservant, stayerp, stayerc, classnew))
#write.dta(NAPPest,paste(root,"NAPP/Proof2RCoordSubsamplenew.dta",sep=""))
write.dta(NAPPest,paste(root,"NAPP/ReStat/Proof2RCoordSubsamplenew.dta",sep=""))

###########################################################
# To keep only those with occupations and employed #########
#00.a Benchmark exercise
NAPPest<-read.dta(paste(root,"NAPP/ReStat/Proof2RCoordSubsamplenew.dta",sep=""),convert.factors=FALSE)
NAPPest<-subset(NAPPest,!(classnew == 0 | classnew==2)) # those with occupations
NAPPest <- NAPPest[(ddply(NAPPest,.(social),transform, Nn =length(social))$Nn>=30),]
NAPPest$classnew[NAPPest$classnew==1] <- 2 ##professional is now class 2
NAPPest$classnew <- NAPPest$classnew-2
#write.dta(NAPPest,paste(root,"NAPP/Proof2RCoordSubsamplelast.dta",sep=""))
write.dta(NAPPest,paste(root,"NAPP/ReStat/Proof2RCoordSubsamplelast.dta",sep=""))
#####To compute some descriptive statistics In Table D.6
NAPPstat<-subset(NAPP,HHNUMhead %in% NAPPest$HHNUMhead,select=c(civil, social, HHNUMhead, objectid_1,Buffer400))
#Number of individuals per street point
NAPPsummary <- summaryBy(Buffer400~civil+social+objectid_1, data=NAPPstat, FUN=c(sum), na.rm=TRUE, keep.names=T)
sapply(list(mean,median,sd,min,max),function(fun,x) fun(NAPPsummary$Buffer400))
#Number of streets points per parish
NAPPsummary$Buffer400 <- 1
NAPPsummary <- summaryBy(Buffer400~civil+social, data=NAPPsummary, FUN=c(sum), na.rm=TRUE, keep.names=T)
sapply(list(mean,median,sd,min,max),function(fun,x) fun(NAPPsummary$Buffer400))
#Number of parishes per BW
NAPPsummary$Buffer400 <- 1
NAPPsummary <- summaryBy(Buffer400~civil, data=NAPPsummary, FUN=c(sum), na.rm=TRUE, keep.names=T)
sapply(list(mean,median,sd,min,max),function(fun,x) fun(NAPPsummary$Buffer400))
#Number of individuals per parish
NAPPsummary <- summaryBy(Buffer400~civil+social, data=NAPPstat, FUN=c(sum), na.rm=TRUE, keep.names=T)
sapply(list(mean,median,sd,min,max),function(fun,x) fun(NAPPsummary$Buffer400))
#Area of a parishes in mts2
descmap <- mapest[mapest@data$IDSocial %in% NAPPest$social,]
sapply(list(mean,median,sd,min,max),function(fun,x) fun(descmap@data$Shape_Area))
##############

##############################################
## Robustness: Restricting sample
if(exercise=="nmovers") {
  #0.a Robustness: to keep only non movers (see 0.b below) ######### nmovers
  NAPPest <- subset(NAPPest,stayerc==1) ########
  NAPPest <- NAPPest[(ddply(NAPPest,.(social),transform, Nn =length(social))$Nn>=30),]
} else {
  if(exercise=="age1530") {
    #0.a Robustness: to keep only age15-30 (see 0.b below) ######### age1530
    NAPPest <- subset(NAPPest,(AGE>=15)&(AGE<=30)) ########
    NAPPest <- NAPPest[(ddply(NAPPest,.(social),transform, Nn =length(social))$Nn>=30),]    
  } else {
    if(exercise=="age1545") {
      #0.a Robustness: to keep only age15-45 (see 0.b below) ######### age1545
      NAPPest <- subset(NAPPest,(AGE>=15)&(AGE<=45)) ########
      NAPPest <- NAPPest[(ddply(NAPPest,.(social),transform, Nn =length(social))$Nn>=30),]    
    } else {
      if(exercise=="h1") {
        #0.a Robustness: to keep only first half from each parish (see 0.b below) ######### h1 to perform the split-jackknife
        NAPPest <- NAPPest[NAPPest$HHNUMhead<=ddply(NAPPest,.(social),transform, half = median(HHNUMhead))$half,]
      } else {
        if(exercise=="h2") {
          #0.a Robustness: to keep only second half from each parish (see 0.b below) ######### h2 to perform the split-jackknife
          NAPPest <- NAPPest[NAPPest$HHNUMhead>ddply(NAPPest,.(social),transform, half = median(HHNUMhead))$half,]
        }
      }
    }
  }
}

##############################################

class <- factor(NAPPest$classnew) #defining factors based on class 
class.m <- model.matrix(~ -1 + class) #creating columnsdummy vars for each class category
NAPPest <- cbind(NAPPest,class.m)
colnames(NAPPest)[colnames(NAPPest) %in% c("civil","social","HHNUMhead","classnew")] <- c("IDCivil","IDSocial","ID","choice")


PooledNAPPID <- NAPPest[(colnames(NAPPest) %in% c("IDCivil","IDSocial","ID","choice"))]
#Subset of individual characteristics
NAPPX <- NAPPest[!(colnames(NAPPest) %in% c("IDCivil","ID","choice"))]
NAPPXlist <- split(NAPPX, list(NAPPX$IDSocial))
NAPPXlist <- lapply(NAPPXlist,function(x) x <- as.matrix(x[!(colnames(x) %in% c("IDSocial"))])) #to convert data.frame to matrix

#Geographic variables
##00.b To keep only those with occupations and employed, benchmark exercise, 
NAPPgeo <- subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) & !(classnew == 0 | classnew==2) ,select=c(longitude , latitude, social))
NAPPgeo <- NAPPgeo[(ddply(NAPPgeo,.(social),transform, Nn =length(social))$Nn>=30),]
##############################################
## Robustness: Geographic variables
if(exercise=="nmovers") {
  #0.b Robustness: to keep only non movers with occupations and employed (see #0.c below) ######### nmovers
  NAPPgeo <- subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) & stayerc==1 & !(classnew == 0 | classnew==2),select=c(longitude , latitude, social)) ########
  NAPPgeo <- NAPPgeo[(ddply(NAPPgeo,.(social),transform, Nn =length(social))$Nn>=30),]
} else {
  if(exercise=="age1530") {
    #0.b Robustness: to keep only age15-30 with occupations and employed (see #0.c below) ######### age1530
    NAPPgeo <- subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) & (AGE>=15 & AGE<=30) & !(classnew == 0 | classnew==2),select=c(longitude , latitude, social)) ########
    NAPPgeo <- NAPPgeo[(ddply(NAPPgeo,.(social),transform, Nn =length(social))$Nn>=30),]
  } else {
    if(exercise=="age1545") {
      #0.b Robustness: to keep only age15-45 with occupations and employed (see #0.c below) ######### age1545
      NAPPgeo <- subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) & (AGE>=15 & AGE<=45) & !(classnew == 0 | classnew==2),select=c(longitude , latitude, social)) ########
      NAPPgeo <- NAPPgeo[(ddply(NAPPgeo,.(social),transform, Nn =length(social))$Nn>=30),]
    } else {
      if(exercise=="h1") {
        #0.b Robustness: to keep only first half from each parish (see #0.c below) ######### h1 to perform the split-jackknife
        NAPPgeo <- subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) & (HHNUMhead %in% NAPPest$ID) & !(classnew == 0 | classnew==2),select=c(longitude , latitude, social)) ########
      } else {
        if(exercise=="h2") {
          #0.b Robustness: to keep only first half from each parish (see #0.c below) ######### h2 to perform the split-jackknife
          NAPPgeo <- subset(NAPP,!(civil %in% c.n.2s) & !(social %in% p.n.N) & (HHNUMhead %in% NAPPest$ID) & !(classnew == 0 | classnew==2),select=c(longitude , latitude, social)) ########
        }
      }
    }
  }
}
##############################################

colnames(NAPPgeo)[colnames(NAPPgeo) %in% c("longitude","latitude","social")] <- c("long","lati","IDSocial")
coordslistNAPP  <- split(NAPPgeo, list(NAPPgeo$IDSocial))
coordslistNAPP <- lapply(coordslistNAPP, function(x) x <- cbind(x[["long"]]+rnorm(nrow(x),0,0.1),x[["lati"]]+rnorm(nrow(x),0,0.1)) ) #adding a minor disturbance to original coordinates so sdep functions work fine similar to #jitterDupCoords(, max=0.01)

#########
# Save, ONLY for benchmark exercise #00.a, for robustness continuous W
if(exercise=="w50list") {
  save(PooledNAPPID, file=paste(root,"NAPP/ReStat/PooledNAPPIDnewwcontinuous.RData",sep="" )) 
  save(NAPPXlist, file=paste(root,"NAPP/ReStat/NAPPXlistnewwcontinuous.RData",sep="" )) 
  save(coordslistNAPP, file=paste(root,"NAPP/ReStat/coordslistNAPPnewwcontinuous.RData",sep="" )) 
}
#########

coordslist <- coordslistNAPP


##############################################
#1. Robustness weighting matrices 
##00.b w within 50 mts w50list benchmark exercise

if(exercise %in% c("w50list","nmovers", "age1530", "age1545", "h1","h2")) {
  d50neighbours <- lapply(coordslistNAPP, within50nei ) 
  w50list<-lapply(d50neighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
  dneighbours <- d50neighbours
  wlist <- w50list #if we want neighbours within 50mts
} else {
  if(exercise == "atleast1") {
    ##0.b Robustness w within 0 mts (see #0.c below)  Lists of neighbours per parish (Original weights: at least one neighbor) atleast1 which is equivalent to d=0
    dneighbours <- lapply(coordslistNAPP, atleast1nei ) 
    #Adjancecy matrix row normalised (style="W")
    wlist<-lapply(dneighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
  } else {
    if(exercise == "atmost10") { 
      ##0.b Robustness w 10 neighbours (see #0.c below) get at most 10 neighbours atmost10
      kneighbours <- lapply(coordslistNAPP, atmost10nei ) 
      wklist<-lapply(kneighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
      dneighbours <- kneighbours
      wlist <- wklist 
    } else {
      if(exercise == "w100list") { 
        ##0.b Robustness  w within 100 mts (see #0.c below) w100list
        d100neighbours <- lapply(coordslistNAPP, within100nei ) 
        w100list<-lapply(d100neighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
        dneighbours <- d100neighbours
        wlist <- w100list #if we want neighbours within 100mts
      } else {
        if(exercise == "within50All") { 
          ##0.b Robustness  w within 100 mts (see #0.c below) w100list
          d50allneighbours <- lapply(coordslistNAPP, within50Allnei) 
          w50alllist<-lapply(d50allneighbours, function(x) nb2listw(x, glist=NULL, style="W", zero.policy=TRUE)) # zero.policy=NULL if TRUE permit the weights list to be formed with zero-length weights vectors. Style  can take values "W", "B", "C", "U", "minmax" and "S"
          dneighbours <- d50allneighbours
          wlist <- w50alllist #if we want neighbours within 100mts
        }
      }
    }
  }
}
##############################################

#Combining lists
NAPPXwlist <- mapply(weighting1,NAPPXlist,wlist) 
NAPPXlist <- lapply(NAPPXlist,function(x) x <- as.data.frame(x)) #to convert  matrix to data.frame
pooledNAPPXlist <- rbindlist(NAPPXlist)
pooledNAPPXlist <- cbind(PooledNAPPID, pooledNAPPXlist)

NAPPXwlist <- lapply(NAPPXwlist,function(x) x <- as.data.frame(x)) #to convert  matrix to data.frame
pooledNAPPXwlist <- rbindlist(NAPPXwlist)
attr(pooledNAPPXwlist,"class") <- "data.frame"


pooleddatalist <- cbind(pooledNAPPXlist[ !(colnames(pooledNAPPXlist) %in% paste("class",c(min(pooledNAPPXlist$choice):max(pooledNAPPXlist$choice)),sep="")) ],pooledNAPPXwlist)
colnames(pooleddatalist)[colnames(pooleddatalist) %in% paste("class",c(min(pooledNAPPXlist$choice):max(pooledNAPPXlist$choice)),"w",sep="") ] <- paste("sw.",c(min(pooledNAPPXlist$choice):max(pooledNAPPXlist$choice)),sep="")
datalist <- split(pooleddatalist, list(pooleddatalist$IDSocial))

#Poduce the long format and pooled data
datalistlong <- lapply(datalist,function(x) tolong(x,"choice",c("sw.")))
pooleddata <- rbindlist(datalistlong)

#Produce long data for homogenoeus beliefs
pooleddatalisthomo <- pooleddatalist
pooleddatalisthomo[grep("w", names(pooleddatalisthomo),value=TRUE)] <-  ddply(pooledNAPPXlist, .(IDSocial), sapply, FUN = 'witaver')[c("SEX","AGE","married",  "nchild",   "nservant", "stayerp",  "stayerc",   "class0",   "class1",   "class2", "class3", "class4", "class5")]

pooleddatahomo <-  tolong(pooleddatalisthomo,"choice",c("sw."))

###Some maps
for (civils in c(30,14)) {
  uniqueIDsocials <- unique(pooleddatalist[pooleddatalist$IDCivil==civils,]$IDSocial)
  linesubIDCivil <- line[line@data$IDSocial %in% uniqueIDsocials,]
  listordIDCivil <- c()
  for(i in c(1:length(datalist))) {
    if (unique(datalist[[i]]$IDSocial) %in% uniqueIDsocials) listordIDCivil <- c(listordIDCivil,i)
  }
  
  cairo_ps(paste("MapNeigh_IDCivilnew",civils,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
  print(plot(linesubIDCivil,lwd=2,col="gray"))
  for(i in listordIDCivil) {
    print(points( coordslist[[i]] + cbind(rnorm(nrow(coordslist[[i]]),5,2),rnorm(nrow(coordslist[[i]]),5,2)), pch=20, col=i,cex=.7))
#    print(plot(dneighbours[[i]],coordslist[[i]] + cbind(rnorm(nrow(coordslist[[i]]),5,2),rnorm(nrow(coordslist[[i]]),5,2)), col="red", pch=20))
  }
  title(paste("IDCivil: ",civils,sep=""))
  dev.off()
}


#To save data paste(root,"Mola Data/",sep="")
#########################################
##DEPENDING ON THE EXERCISE: BENCHMARK OR ROBUSTNESS, CHANGE THE NAME ENDING OF THE FOLLOWING SAVED DATASETS TO ONE OF THE FOLLOWING:
## w50list atleast1 w100list age1530 age1545 h1 h2 within50All.
## Therefore, if one is running the benchmark specifiction, the first line should be:
## save(pooleddata, file=paste(root,"NAPP/ReStat/pooleddataNAPPneww50list.RData",sep="" ))
#########################################
if(exercise=="w50list"){
  save(pooleddata, file=paste(root,"NAPP/ReStat/pooleddataNAPPnew.RData",sep="" )) #Data per parish in long format (sw) pooled together
  save(pooleddatahomo, file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew.RData",sep="" )) #Data per parish in long format (sw) pooled together for homogeneous beliefs
  save(pooleddatalist, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew.RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together
  save(pooleddatalisthomo, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew.RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together for the homogeneous case
  save(datalist, file=paste(root,"NAPP/ReStat/datalistNAPPnew.RData",sep="" )) #Lists of data per parish in wide format (sw.1,sw.2,...)
  save(coordslist, file=paste(root,"NAPP/ReStat/coordslistNAPPnew.RData",sep="" )) #Lists of coordinates per parish
  save(dneighbours, file=paste(root,"NAPP/ReStat/dneighboursNAPPnew.RData",sep="")) #Lists of neigbours identities per parish
  save(wlist, file=paste(root,"NAPP/ReStat/wlistNAPPnew.RData",sep="")) #Sparse Lists of neighbours per parish
}

save(pooleddata, file=paste(root,"NAPP/ReStat/pooleddataNAPPnew",exercise,".RData",sep="" )) #Data per parish in long format (sw) pooled together
save(pooleddatahomo, file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew",exercise,".RData",sep="" )) #Data per parish in long format (sw) pooled together for homogeneous beliefs
save(pooleddatalist, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew",exercise,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together
save(pooleddatalisthomo, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew",exercise,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together for the homogeneous case
save(datalist, file=paste(root,"NAPP/ReStat/datalistNAPPnew",exercise,".RData",sep="" )) #Lists of data per parish in wide format (sw.1,sw.2,...)
save(coordslist, file=paste(root,"NAPP/ReStat/coordslistNAPPnew",exercise,".RData",sep="" )) #Lists of coordinates per parish
save(dneighbours, file=paste(root,"NAPP/ReStat/dneighboursNAPPnew",exercise,".RData",sep="")) #Lists of neigbours identities per parish
save(wlist, file=paste(root,"NAPP/ReStat/wlistNAPPnew",exercise,".RData",sep="")) #Sparse Lists of neighbours per parish
  
sink()

#########################################################################
###2. ESTIMATION######
########################################################################
 rm(list=ls(all=TRUE))
##DEPENDING ON THE EXERCISE: BENCHMARK OR ROBUSTNESS, CHANGE THE NAME OF ALL DATASETS, that we either save of load below, TO ONE OF THE FOLLOWING:
## w50list atleast1 w100list nmovers age1530 w50list atmost10 h1 h2 within50All.
exercise <- "w50list" #"within50All" 
 set.seed(12345)
#root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/" #root <- "C:/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))

#Load Data
if(exercise=="w50list"){
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnew.RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew.RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew.RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew.RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/datalistNAPPnew.RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
  load(file=paste(root,"NAPP/ReStat/coordslistNAPPnew.RData",sep="" )) #coordslist, Lists of coordinates per parish
  load(file=paste(root,"NAPP/ReStat/dneighboursNAPPnew.RData",sep="")) #dneighbours, Lists of neigbours identities per parish
  load(file=paste(root,"NAPP/ReStat/wlistNAPPnew.RData",sep="")) #wlist, Sparse Lists of neighbours per parish
} else {
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnew",exercise,".RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew",exercise,".RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew",exercise,".RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew",exercise,".RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/datalistNAPPnew",exercise,".RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
  load(file=paste(root,"NAPP/ReStat/coordslistNAPPnew",exercise,".RData",sep="" )) #coordslist, Lists of coordinates per parish
  load(file=paste(root,"NAPP/ReStat/dneighboursNAPPnew",exercise,".RData",sep="")) #dneighbours, Lists of neigbours identities per parish
  load(file=paste(root,"NAPP/ReStat/wlistNAPPnew",exercise,".RData",sep="")) #wlist, Sparse Lists of neighbours per parish
}

###########################
###Homogeneous Beliefs (i.e. group interactions, no correlated effects and fixed point beliefs subroutine)
###########################
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu


if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
sink(paste(rootsave,"NAPPEstimationHomonew.txt",sep=""))
print("Homogenous beliefs: group interations, no correlated effects and fixed point subroutine")

#define which variables we include in the estimation
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc" "SEX"
wvar <- "sw"

## Naive Estimation Homogeneous
###################
print("Naive Estimation")
options(max.print=1000000)
gc()
#1. x_i
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(xvars, collapse= "+"),"|", wvar ,sep="")))
ml.w <- mnlogit(fformula, pooleddatahomo,"alt")
if(exercise=="w50list"){
  save(ml.w,file=paste("Mlwhomo.RData",sep=""))
} else {
  save(ml.w,file=paste("Mlwhomo",exercise,".RData",sep=""))
}
print("w, x_i")
print(summary(ml.w))
#2. x_i, x_p
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep="")),"|", wvar ,sep="")))
ml.om.w <- mnlogit(fformula, pooleddatahomo,"alt")
if(exercise=="w50list"){
  save(ml.om.w,file=paste("Mlomwhomo.RData",sep=""))
} else {
  save(ml.w,file=paste("Mlomwhomo",exercise,".RData",sep=""))
}
print("w, x_i x_p")
print(summary(ml.om.w))
#3. x_i, x_p, t_b
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))
ml.m.w <- mnlogit(fformula, pooleddatahomo,"alt")
if(exercise=="w50list"){
  save(ml.m.w,file=paste("Mlmwhomo.RData",sep=""))
} else {
  save(ml.w,file=paste("Mlmwhomo",exercise,".RData",sep=""))
}

print("w, x_i x_p t_b")
print(summary(ml.m.w))


## Nested Pseudo likelihood Estimation Homogeneous
###################

#define which variables we include in the estimation and which formula we use
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc" "SEX",
wvar <- "sw"
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))

#Outer Iteration: Maximum likelihood
alter <- sort(unique(pooleddatahomo$alt))
L <- length(alter)
it0 <- 1
#maxiter0 <- 600
#zero <- 1E-3
maxiter0 <- 50
zero0 <- 1E-8
Jeit <- matrix(rep(0,maxiter0*L),nrow=maxiter0) #Keep track of the coefficients
ndJeit <- L

#To keep original pooleddata to implement Weidner (2012) method
pooleddatahomo0 <- pooleddatahomo
pooleddatalisthomo0 <- pooleddatalisthomo

#to define outer iteration of expectations
ndexpectito <- L
ndexpectitoall <- matrix(0,nrow=maxiter0) #Keep track of the tolerance level
expeco <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))

while (it0<=maxiter0 & ndexpectito > zero0) {
  print(paste("ML iteration: ",it0, sep=""))
  #Estimating
  if(it0==1) {
    if(exercise=="w50list"){
      load("Mlmwhomo.RData")
    } else {
      load(file=paste("Mlmwhomo",exercise,".RData",sep=""))
    }
     
      ml.m.w0 <- ml.m.w
    } else  ml.m.w <- mnlogit(fformula, pooleddatahomo, "alt")

  #Define starting conditions, if we want to give them staring values based on theta
  #if (it0==1) ml.m.w$coeff[paste(wvar,":",alter,sep="")] <- runif(L,2,4)

  #Keeping coeff
  Jeit[it0,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")]
  print(Jeit[it0,])
  ndexpectitoall[it0,] <- ndexpectito 
  print(ndexpectitoall[it0,])
  #Inner iteration, fixed point in beliefs
  it <- 1
  zero <- 1E-8
  #maxiter <- 1 #if iterating a la Aguirregabiria and Mira (2007) or a la Weidner (2012)
  maxiter <- 100 #If iterating with the fixed point a la Lee Lin Liu (2014)
  expectit <- matrix(rep(0,maxiter*L),nrow=maxiter)
  ndexpectit <- L
  #subsetting sample to
  expec <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))
  #iteration of beliefs
  while (it<=maxiter & ndexpectit > zero) {
    print(paste("Inner Iteration ",it,sep=""))
    #keepint track of expectations
    expectit[it,] <- apply(expec,2,mean)

    alpha <- .1 #Kasahara and Shimotsu (2012)
    #Predict probabilities on the pooled data list
    tempprob <- predict(ml.m.w,pooleddatahomo,probability=TRUE)
    #Update Fixed point
    for(aa in sort(alter, decreasing=TRUE) ){
      ap <- which(alter==aa)
      if (aa != min(alter)) { #to guarantee KS probabilities are within [0,1] bounds
        if (AM==1) { 
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- ml.m.w$probabilities[,ap] #Aguirregabiria and Mira (2007)
        } else {
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- ((tempprob[,ap])^alpha)*((ml.m.w0$probabilities[,ap])^(1-alpha)) #Lee and Liu (2014)
        }
      } else {
        if (AM==1) { 
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- 1-apply(ml.m.w$probabilities[,paste(alter[alter!=min(alter)],sep="")],1,sum) 
        } else {
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- 1-apply(((tempprob[,paste(alter[alter!=min(alter)],sep="")])^alpha)*((ml.m.w0$probabilities[,paste(alter[alter!=min(alter)],sep="")])^(1-alpha)),1,sum) #Kasahara and Shimotsu (2012)
        }
      }
    }

    #``spatially'' weighting
    pooleddatalisthomo[paste(wvar,".",alter,sep="")] <-  ddply(pooleddatalisthomo, .(IDSocial), sapply, FUN = 'witaver')[paste(wvar,".",alter,sep="")]

    #updating expectations
    expec1 <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))

    #Updating pooleddata (long format) #Another option pooleddatahomo <-  tolong(pooleddatalisthomo,"choice",c(paste(wvar,".",sep="")))
    for(aa in alter){
      pooleddatahomo[pooleddatahomo$alt==aa,][[paste(wvar,sep="")]] <- pooleddatalisthomo[[paste(wvar,".",aa,sep="")]]
    }

    if (it>=2) ndexpectit <- max(as.matrix(abs(expec1-expec))) # the maximum of all matrix entries #ndexpectit <- norm(as.matrix(expec1-expec),type="I")  #element by element sum(distr(expec,expec1))
    print(paste("Max abs diff inner: ",ndexpectit,sep=""))
    expec <- expec1
    it <- it+1
  }
  if (it<=maxiter & ndexpectit < zero) print(paste("Fixed point in beliefs convergence reached at iter: ",it-1,sep="")) else print(paste("Fixed point in beliefs one step update it: ",it,sep="")) #print(paste("Fixed point in beliefs FAILED to converge after it: ",it,sep=""))
  
  #updating expectations
  expec1o <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))
  
  #if (it0>=2) ndJeit <- sum(abs(Jeit[it0,]-Jeit[it0-1,]))
  if (it0>=2) {
    #ndexpectito <- norm(as.matrix(expeco-expec1o),type="I")
    ndexpectito <- max(as.matrix(abs(expec1o-expeco))) # the maximum of all matrix entries
    }#element by element ndexpectito <- sum(distr(expeco,expec1o))
  print(paste("Diff Abs max ",ndexpectito,sep=""))
  expeco <- expec1o
  #To keep the previous probabilities to perform Kasahara and Shimotsu
  ml.m.w0 <- ml.m.w
  it0 <- it0+1
}
print(paste("Outer ML Homogeneous iter converged at iter: ",it0-1," using weights: ",wvar,sep=""))
Jeit[it0-1,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")]
print(Jeit[it0-1,])
ndexpectitoall[it0-1,] <- ndexpectito 
print(ndexpectitoall[it0-1,])
#Saving last iteration results
if(exercise=="w50list"){
  save(ml.m.w,file=paste(rootsave,"PMLmwhomofinal.RData",sep=""))
} else {
  save(ml.m.w,file=paste(rootsave,"PMLmwhomofinal",exercise,".RData",sep=""))
}

print(summary(ml.m.w))

#Plot convergence in coefficientes
#coefficients
colnames(Jeit)<-c(paste("J.",alter,sep=""))
dataJeit<-as.data.frame(Jeit[c(1:(it0-1)),])
dataJeit$iter <- c(1:nrow(dataJeit))
meltdataJeit<-melt(dataJeit,id="iter")
if(exercise=="w50list"){
  cairo_ps(paste(rootsave,"JePMLmwhomofinal.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
} else {
  cairo_ps(paste(rootsave,"JePMLmwhomofinal",exercise,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
}
print(ggplot(meltdataJeit,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(paste(J[y]^{t})))+xlab("t"))
dev.off()
if(exercise=="w50list"){
  save(dataJeit,file=paste(rootsave,"JePMLmwhomofinal.RData",sep=""))
} else {
  save(dataJeit,file=paste(rootsave,"JePMLmwhomofinal",exercise,".RData",sep=""))
}




#Plot convergence in Infinity Norm
#coefficients
colnames(ndexpectitoall)<-"AbsMax"
dataInftyNorm<-as.data.frame(ndexpectitoall[c(3:(it0-1))])
colnames(dataInftyNorm)<-"AbsMax"
dataInftyNorm$iter <- c(1:nrow(dataInftyNorm))
meltdataInftyNorm<-melt(dataInftyNorm,id="iter")
if(exercise=="w50list"){
  cairo_ps(paste(rootsave,"InftyNormMLmwhomofinal.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
} else {
  cairo_ps(paste(rootsave,"InftyNormMLmwhomofinal",exercise,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
}
print(ggplot(meltdataInftyNorm,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(InftyNorm))+xlab("t"))
dev.off()
if(exercise=="w50list"){
  save(dataInftyNorm,file=paste(rootsave,"InftyNormMLmwhomofinal.RData",sep=""))
} else {
  save(dataInftyNorm,file=paste(rootsave,"InftyNormMLmwhomofinal",exercise,".RData",sep=""))
}


#### Homogeneous without contextual effects
## Nested Pseudo likelihood Estimation Homogeneous
###################

#define which variables we include in the estimation and which formula we use
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc" "SEX",
wvar <- "sw"
#4. x_i, x_p, t_b
##w
fformulawodelta <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
fformula <-fformulawodelta
ml.m.w <- mnlogit(fformula, pooleddatahomo,"alt")
if(exercise=="w50list"){
  save(ml.m.w,file=paste("Mlmwhomowodelta.RData",sep=""))
} else {
  save(ml.m.w,file=paste("Mlmwhomowodelta",exercise,".RData",sep=""))
}

print("w, x_i t_b")
print(summary(ml.m.w))

#Outer Iteration: Maximum likelihood
alter <- sort(unique(pooleddatahomo$alt))
L <- length(alter)
it0 <- 1
#maxiter0 <- 600
#zero <- 1E-3
maxiter0 <- 50
zero0 <- 1E-8
Jeit <- matrix(rep(0,maxiter0*L),nrow=maxiter0) #Keep track of the coefficients
ndJeit <- L

#To keep original pooleddata to implement Weidner (2012) method
pooleddatahomo0 <- pooleddatahomo
pooleddatalisthomo0 <- pooleddatalisthomo

#to define outer iteration of expectations
ndexpectito <- L
ndexpectitoall <- matrix(0,nrow=maxiter0) #Keep track of the tolerance level
expeco <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))

while (it0<=maxiter0 & ndexpectito > zero0) {
  print(paste("ML iteration: ",it0, sep=""))
  #Estimating
  if(it0==1) {
    if(exercise=="w50list"){
      load("Mlmwhomowodelta.RData")
    } else {
      load("Mlmwhomowodelta",exercise,".RData")
    }
    
    ml.m.w0 <- ml.m.w
  } else  ml.m.w <- mnlogit(fformula, pooleddatahomo, "alt")
  
  #Define starting conditions, if we want to give them staring values based on theta
  #if (it0==1) ml.m.w$coeff[paste(wvar,":",alter,sep="")] <- runif(L,2,4)
  
  #Keeping coeff
  Jeit[it0,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")]
  print(Jeit[it0,])
  ndexpectitoall[it0,] <- ndexpectito 
  print(ndexpectitoall[it0,])
  #Inner iteration, fixed point in beliefs
  it <- 1
  zero <- 1E-8
  #maxiter <- 1 #if iterating a la Aguirregabiria and Mira (2007) or a la Weidner (2012)
  maxiter <- 100 #If iterating with the fixed point a la Lee Lin Liu (2014)
  expectit <- matrix(rep(0,maxiter*L),nrow=maxiter)
  ndexpectit <- L
  #subsetting sample to
  expec <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))
  #iteration of beliefs
  while (it<=maxiter & ndexpectit > zero) {
    print(paste("Inner Iteration ",it,sep=""))
    #keepint track of expectations
    expectit[it,] <- apply(expec,2,mean)
    
    alpha <- .1 #Kasahara and Shimotsu (2012)
    #Predict probabilities on the pooled data list
    tempprob <- predict(ml.m.w,pooleddatahomo,probability=TRUE)
    #Update Fixed point
    for(aa in sort(alter, decreasing=TRUE) ){
      ap <- which(alter==aa)
      if (aa != min(alter)) { #to guarantee KS probabilities are within [0,1] bounds
        if (AM==1) { 
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- ml.m.w$probabilities[,ap] #Aguirregabiria and Mira (2007)
        } else {
          #pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- ((ml.m.w$probabilities[,ap])^alpha)*((ml.m.w0$probabilities[,ap])^(1-alpha)) #Kasahara and Shimotsu (2012)
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- ((tempprob[,ap])^alpha)*((ml.m.w0$probabilities[,ap])^(1-alpha)) #Lee and Liu (2014)
        }
      } else {
        if (AM==1) { 
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- 1-apply(ml.m.w$probabilities[,paste(alter[alter!=min(alter)],sep="")],1,sum) 
        } else {
          pooleddatalisthomo[,paste(wvar,".",aa,sep="")] <- 1-apply(((tempprob[,paste(alter[alter!=min(alter)],sep="")])^alpha)*((ml.m.w0$probabilities[,paste(alter[alter!=min(alter)],sep="")])^(1-alpha)),1,sum) #Kasahara and Shimotsu (2012)
        }
      }
    }
    
    #``spatially'' weighting
    pooleddatalisthomo[paste(wvar,".",alter,sep="")] <-  ddply(pooleddatalisthomo, .(IDSocial), sapply, FUN = 'witaver')[paste(wvar,".",alter,sep="")]
    
    #updating expectations
    expec1 <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))
    
    #Updating pooleddata (long format) #Another option pooleddatahomo <-  tolong(pooleddatalisthomo,"choice",c(paste(wvar,".",sep="")))
    for(aa in alter){
      pooleddatahomo[pooleddatahomo$alt==aa,][[paste(wvar,sep="")]] <- pooleddatalisthomo[[paste(wvar,".",aa,sep="")]]
    }
    
    if (it>=2) ndexpectit <- max(as.matrix(abs(expec1-expec))) # the maximum of all matrix entries #ndexpectit <- norm(as.matrix(expec1-expec),type="I")  #element by element sum(distr(expec,expec1))
    print(paste("Max abs diff inner: ",ndexpectit,sep=""))
    expec <- expec1
    it <- it+1
  }
  if (it<=maxiter & ndexpectit < zero) print(paste("Fixed point in beliefs convergence reached at iter: ",it-1,sep="")) else print(paste("Fixed point in beliefs one step update it: ",it,sep="")) #print(paste("Fixed point in beliefs FAILED to converge after it: ",it,sep=""))
  
  #updating expectations
  expec1o <- subset(pooleddatalisthomo,select=paste(wvar,".",alter,sep=""))
  
  if (it0>=2) {
    #ndexpectito <- norm(as.matrix(expeco-expec1o),type="I")
    ndexpectito <- max(as.matrix(abs(expec1o-expeco))) # the maximum of all matrix entries
  }#element by element ndexpectito <- sum(distr(expeco,expec1o))
  print(paste("Diff Abs max ",ndexpectito,sep=""))
  expeco <- expec1o
  #To keep the previous probabilities to perform Kasahara and Shimotsu
  ml.m.w0 <- ml.m.w
  it0 <- it0+1
}
print(paste("Outer ML Homogeneous iter converged at iter: ",it0-1," using weights: ",wvar,sep=""))
Jeit[it0-1,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")]
print(Jeit[it0-1,])
ndexpectitoall[it0-1,] <- ndexpectito 
print(ndexpectitoall[it0-1,])
#Saving last iteration results
if(exercise=="w50list"){
  save(ml.m.w,file=paste(rootsave,"PMLmwhomofinalwodelta.RData",sep=""))
} else {
  save(ml.m.w,file=paste(rootsave,"PMLmwhomofinalwodelta",exercise,".RData",sep=""))
}

print(summary(ml.m.w))

sink()


###########################
###Heterogenous Beliefs (i.e. ``network'' interactions, correlated effects and fixed point beliefs subroutine)
###########################
rm(list=ls(all=TRUE))
##DEPENDING ON THE EXERCISE: BENCHMARK OR ROBUSTNESS, CHANGE THE NAME OF ALL DATASETS, that we either save of load below, TO ONE OF THE FOLLOWING:
## w50list atleast1 w100list nmovers age1530 w50list atmost10 h1 h2 within50All .
exercise <- "w50list" #"within50All"
set.seed(12345)
#root <- "S:/NetworkParish/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
#root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/" #root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu
#if( AM==1 ){  rootsave <- paste(root,"Results/EEA/",sep="") } else   {rootsave <- paste(root,"Results/EEA/KS/",sep="")}
if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
#codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Codes/RCodes/EEACodes/" #codes <- "C:/HPC/CODE/"
#source(paste(codes,"Mlogit_NAPPHeteroFunctions.R",sep=""))

#Load Data
if(exercise=="w50list"){
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnew.RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew.RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew.RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew.RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/datalistNAPPnew.RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
  load(file=paste(root,"NAPP/ReStat/coordslistNAPPnew.RData",sep="" )) #coordslist, Lists of coordinates per parish
  load(file=paste(root,"NAPP/ReStat/dneighboursNAPPnew.RData",sep="")) #dneighbours, Lists of neigbours identities per parish
  load(file=paste(root,"NAPP/ReStat/wlistNAPPnew.RData",sep="")) #wlist, Sparse Lists of neighbours per parish
} else {
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnew",exercise,".RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew",exercise,".RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew",exercise,".RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew",exercise,".RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
  load(file=paste(root,"NAPP/ReStat/datalistNAPPnew",exercise,".RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
  load(file=paste(root,"NAPP/ReStat/coordslistNAPPnew",exercise,".RData",sep="" )) #coordslist, Lists of coordinates per parish
  load(file=paste(root,"NAPP/ReStat/dneighboursNAPPnew",exercise,".RData",sep="")) #dneighbours, Lists of neigbours identities per parish
  load(file=paste(root,"NAPP/ReStat/wlistNAPPnew",exercise,".RData",sep="")) #wlist, Sparse Lists of neighbours per parish
}

sink(paste(rootsave,"NAPPEstimationHetenew.txt",sep=""))
options(max.print=1000000)
gc()
print("Heterogeneous  beliefs: ``network'' interactions, correlated effects and fixed point beliefs subroutine")

#define which variables we include in the estimation
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc" "SEX",
wvar <- "sw"

## Naive Estimation Heterogeneous
###################
print("Naive Estimation")
gc()
#1. x_i  
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(xvars, collapse= "+"),"|", wvar ,sep="")))          
ml.w <- mnlogit(fformula, pooleddata,"alt")
if(exercise=="w50list"){
  save(ml.w,file=paste("Mlwnew.RData",sep=""))
} else {
  save(ml.w,file=paste("Mlwnew",exercise,".RData",sep=""))
}
print("w, x_i")
print(summary(ml.w))
#2. x_i, x_p
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep="")),"|", wvar ,sep="")))          
ml.om.w <- mnlogit(fformula, pooleddata,"alt")
if(exercise=="w50list"){
  save(ml.om.w,file=paste("Mlomwnew.RData",sep=""))
} else {
  save(ml.om.w,file=paste("Mlomwnew",exercise,".RData",sep=""))
}

print("w, x_i x_p")
print(summary(ml.om.w))
#3. x_i, x_p, t_b
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
ml.m.w <- mnlogit(fformula, pooleddata,"alt")
if(exercise=="w50list"){
  save(ml.m.w,file=paste("Mlmwnew.RData",sep=""))
} else {
  save(ml.m.w,file=paste("Mlmwnew",exercise,".RData",sep=""))
}

print("w, x_i x_p t_b")
print(summary(ml.m.w))

#4. x_i, x_p, t_s
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep=""))) 
ml.mt.w <- mnlogit(fformula, pooleddata,"alt")
if(exercise=="w50list"){
  save(ml.mt.w,file=paste("Mlmtwnew.RData",sep=""))
} else {
  save(ml.mt.w,file=paste("Mlmtwnew",exercise,".RData",sep=""))
}

##0.d Robustness When robustness we have to change the ending of the file to accomodate one of the following
# #wexernei COULD BE atleast1 w100list nmovers age1530 w50list atmost10 within50All
#1. end of robust
print("w, x_i x_p t_p")
print(summary(ml.mt.w))
sink()

## Nested Pseudo likelihood Estimation Heterogeneous
###################

## Nested Pseudo likelihood Estimation Heterogeneous ***WITHOUT*** fe at borough level
#source(paste(codes,"Mlogit_NAPPHeterofformulav3.R",sep=""))

## Nested Pseudo likelihood Estimation Heterogeneous ***WITH*** fe at parish level
#source(paste(codes,"Mlogit_NAPPHeterofformula1v2.R",sep=""))

## Nested Pseudo likelihood Estimation Heterogeneous ***WITH*** fe at parish level
# It can be ran once you have change previous dataset for estimmation:
# pooleddataNAPPnew",wexernei,".RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
# pooleddataNAPPhomonew",wexernei,".RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
# load(file=paste(root,"NAPP/pooleddatalistNAPPnew",wexernei,".RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
# load(file=paste(root,"NAPP/pooleddatalistNAPPhomonew",wexernei,".RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
# load(file=paste(root,"NAPP/datalistNAPPnew",wexernei,".RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
# load(file=paste(root,"NAPP/coordslistNAPPnew",wexernei,".RData",sep="" )) #coordslist, Lists of coordinates per parish
# load(file=paste(root,"NAPP/dneighboursNAPPnew",wexernei,".RData",sep="")) #dneighbours, Lists of neigbours identities per parish
# load(file=paste(root,"NAPP/wlistNAPPnew",wexernei,".RData",sep="")) #wlist, Sparse Lists of neighbours per parish
# load(paste("Mlmtwnew",wexernei,".RData",sep=""))

#source(paste(codes,"Mlogit_NAPPHeterofformula1Robustv2.R",sep=""))